import pandas as pd
import json

desed_files = "/home/shaonian/SED/SED/data_eng/src/DESED_all_filenames.json"
desed = json.load(open(desed_files, "r"))
desed = [x.split("/")[-1].split("_")[0][1:] for x in desed]
print(desed)
as_strong_od = pd.read_csv("/nvmework1/shaonian/Datasets/AudioSet_strong/meta/desed_in_audioset.tsv", sep="\t")
as_od = pd.read_csv("./train_unlabeled_od_AudioSet_label_mapping.tsv", sep="\t")
as_od = [x.split(".")[0][1:] for x in as_od["filename"].values]
as_strong_od= [x.split("/")[-1].split("_")[0] for x in as_strong_od["filename"].values]
dup_filenames = set(as_strong_od) & set(as_od)
dup_desed_filenames = set(desed) & set(as_strong_od)
print(len(as_strong_od), len(as_od), len(dup_filenames))
print(len(desed), len(as_strong_od), len(dup_desed_filenames))

